github: https://github.com/avven1re/SDA_GroupAssignment

#devtools::install_github("amices/ggmice")
library(tidyverse)
library(mice)
library(ggmice)
library(psych)
library(visdat)

#Input Data
ess <- readRDS("Ess round 9.RDS")

Create a function to find the columns full of NAs

#Find the column full of NAs
findNACol <- function(data){
  ind_vec <- c()
  j <- 1
  for (i in 1 : length(data[1, ])) {
    if(sum(is.na(data[, i])) == length(data[, i])){
      ind_vec[j] <- i
      j <- j + 1
    }
  }
  return(ind_vec)
}

Cutting the whole dataset by countries and get rid of NA columns

cutd <- function(data = ess){
  cntrynames <- names(table(data$cntry))
  num_cntry <- length(cntrynames)
  cntrydata_list <- list()
  for (k in 1 : num_cntry) {
    cntry <- filter(data, cntry == cntrynames[k])
    index <- findNACol(cntry)
    processed <- cntry[, -index]
    
    cntrydata_list[[k]] <- processed
  }
  
  names(cntrydata_list) <- cntrynames
  return(cntrydata_list)
}

cntrydatalist <- cutd(ess)

We can see that each country has about 300~340 variables:

summary(cntrydatalist)[, 1:2]
##    Length Class     
## AT 323    data.frame
## BE 329    data.frame
## BG 323    data.frame
## CH 326    data.frame
## CY 321    data.frame
## CZ 314    data.frame
## DE 335    data.frame
## EE 324    data.frame
## ES 328    data.frame
## FI 323    data.frame
## FR 321    data.frame
## GB 339    data.frame
## HR 337    data.frame
## HU 344    data.frame
## IE 326    data.frame
## IT 321    data.frame
## LT 323    data.frame
## LV 330    data.frame
## ME 335    data.frame
## NL 338    data.frame
## NO 320    data.frame
## PL 335    data.frame
## PT 328    data.frame
## RS 332    data.frame
## SE 324    data.frame
## SI 327    data.frame
## SK 338    data.frame

And now we get a list of each countries dataset and also remove the NA columns.

Here we can draw a barplot of the percentage every countries’ missing values

n_mvec <- matrix(NaN, nrow = 1, ncol = length(cntrydatalist))
for (i in 1 : length(cntrydatalist)) {
  n_mvec[i] <- sum(is.na(cntrydatalist[[i]])) / (dim(cntrydatalist[[i]])[1] * dim(cntrydatalist[[i]])[2]) * 100
}
colnames(n_mvec) <- names(cntrydatalist)
barplot(n_mvec, cex.names = 0.5, ylim = c(0, 0.8), main = "The missing value percentage in each country (%)")

Take NL for example, lets take a look of which variables include missing values:

which(colSums(is.na(cntrydatalist$NL))>0)
##  eisced   wkhct  wkhtot eiscedp wkhtotp eiscedf eiscedm   inwtm 
##     188     216     218     232     251     253     258     331
m_NL_n <- names(which(colSums(is.na(cntrydatalist$NL))>0))
m_NL_n
## [1] "eisced"  "wkhct"   "wkhtot"  "eiscedp" "wkhtotp" "eiscedf" "eiscedm"
## [8] "inwtm"

From the ESS9 codebook, these variables are:

eisced: Highest level of education

##      eisced      
##  Min.   : 1.000  
##  1st Qu.: 2.000  
##  Median : 3.000  
##  Mean   : 4.442  
##  3rd Qu.: 6.000  
##  Max.   :55.000  
##  NA's   :2

wkhct: Total contracted hours per week in main job overtime excluded

##      wkhct      
##  Min.   : 0.00  
##  1st Qu.:24.00  
##  Median :36.00  
##  Mean   :30.24  
##  3rd Qu.:40.00  
##  Max.   :85.00  
##  NA's   :289

wkhtot: Total hours normally worked per week in main job overtime included

##      wkhtot      
##  Min.   :  0.00  
##  1st Qu.: 24.00  
##  Median : 36.00  
##  Mean   : 34.67  
##  3rd Qu.: 44.00  
##  Max.   :120.00  
##  NA's   :87

eiscedp: Partner’s highest level of education

##     eiscedp      
##  Min.   : 1.000  
##  1st Qu.: 2.000  
##  Median : 3.000  
##  Mean   : 4.643  
##  3rd Qu.: 6.000  
##  Max.   :55.000  
##  NA's   :613

wkhtotp: Hours normally worked a week in main job overtime included, partner

##     wkhtotp      
##  Min.   :  0.00  
##  1st Qu.: 26.00  
##  Median : 36.00  
##  Mean   : 35.15  
##  3rd Qu.: 40.00  
##  Max.   :100.00  
##  NA's   :955

eiscedf: Father’s highest level of education

##     eiscedf      
##  Min.   : 1.000  
##  1st Qu.: 2.000  
##  Median : 2.000  
##  Mean   : 3.408  
##  3rd Qu.: 5.000  
##  Max.   :55.000  
##  NA's   :234

eiscedm: Mother’s highest level of education

##     eiscedm      
##  Min.   : 1.000  
##  1st Qu.: 1.000  
##  Median : 2.000  
##  Mean   : 3.052  
##  3rd Qu.: 3.000  
##  Max.   :55.000  
##  NA's   :145

So we have: 4 ordinal variables include missing values and 3 numerical variables include missing values.

# "IE" "AT" "PT" "HU" "PL"
#summary(cntrydatalist$AT$hinctnta)

AT <- cntrydatalist$AT
IE <- cntrydatalist$IE
PT <- cntrydatalist$PT
HU <- cntrydatalist$HU
PL <- cntrydatalist$PL

barplot(table(IE$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
        horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in AT")

barplot(table(AT$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
        horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in IE")

barplot(table(PT$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
        horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in PT")

barplot(table(HU$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
        horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in HU")

barplot(table(PL$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
        horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in PL")

AT$hinctnta[AT$hinctnta == 88] <- NA
AT$hinctnta[AT$hinctnta == 77] <- NA

IE$hinctnta[IE$hinctnta == 88] <- NA
IE$hinctnta[IE$hinctnta == 77] <- NA

PT$hinctnta[PT$hinctnta == 88] <- NA
PT$hinctnta[PT$hinctnta == 77] <- NA

HU$hinctnta[HU$hinctnta == 88] <- NA
HU$hinctnta[HU$hinctnta == 77] <- NA

PL$hinctnta[PL$hinctnta == 88] <- NA
PL$hinctnta[PL$hinctnta == 77] <- NA
na_mat <- matrix(c(sum(is.na(AT$hinctnta)), sum(is.na(IE$hinctnta)), 
                   sum(is.na(PT$hinctnta)), sum(is.na(HU$hinctnta)), sum(is.na(PL$hinctnta))), 1, 5)
colnames(na_mat) <- c("AT", "IE", "PT", "HU", "PL")
barplot(na_mat, main = "The number of missing values of hinctnta")

per_na_mat <- matrix(c(sum(is.na(AT$hinctnta))/dim(AT)[1], sum(is.na(IE$hinctnta))/dim(IE)[1], 
                   sum(is.na(PT$hinctnta))/dim(PT)[1], sum(is.na(HU$hinctnta))/dim(HU)[1],
                   sum(is.na(PL$hinctnta))/dim(PL)[1]), 1, 5)
colnames(per_na_mat) <- c("AT", "IE", "PT", "HU", "PL")
barplot(per_na_mat, main = "The proportion of missing values of hinctnta in each country", ylim = c(0, 1.0))

Missing data patterns are not shown as they are uninformative.

To solven missingness in the income variable (hinctnta), we will use multiple imputation. However, since this variable is reported in deciles, imputation will not be straightforward. Ryder et al. (2011) recommend using the midpoint for each class as a surrogate to use for imputation. Furthermore, Donnelly and Pop-Eleches (2018) recommend to use the lower bound of the 10th category plus the width of category 9 as a surrogate for the highest decile. Since deciles differ across countries, this will be done seperately for each country.

# Create decile objects and join for imputation.
# Changing the levels of hinctnta to be the median (the middle of the category) of the deciles

# Austria
AT_deciles <- cbind(1:10, c(7650, 18200, 23400, 28350, 34050, 40150, 47300, 56000, 69050, 94400)) %>%
  as.data.frame()
colnames(AT_deciles) <- c("hinctnta", "income")
AT_deciles$income <- as.numeric(AT_deciles$income) # make numeric

AT <- AT %>% left_join(AT_deciles, by = "hinctnta") # add income surrogate

# Ireland
IE_deciles <- cbind(1:10, c(135, 327.50, 447.5, 572.5, 710, 857.5, 1022.5, 1227.5, 1510, 2020)) %>% 
  as.data.frame()
colnames(IE_deciles) <- c("hinctnta", "income")
IE_deciles$income <- as.numeric(IE_deciles$income) # make numeric

IE <- IE %>% left_join(IE_deciles, by = "hinctnta") # add income surrogate

# Hungary
HU_deciles <- cbind(1:10, c(6500, 149500, 184500, 214500, 244500, 274500, 304500, 339500, 384500, 450000)) %>%
  as.data.frame()
colnames(HU_deciles) <- c("hinctnta", "income")
HU_deciles$income <- as.numeric(HU_deciles$income) # make numeric

HU <- HU %>% left_join(HU_deciles, by = "hinctnta") # add income surrogate

# Portugal
PT_deciles <- cbind(1:10, c(2818, 6709, 8847.5, 11265, 13885, 16556.5, 19728, 23948, 30566.5, 44143)) %>%
  as.data.frame()
colnames(PT_deciles) <- c("hinctnta", "income")
PT_deciles$income <- as.numeric(PT_deciles$income) # make numeric

PT <- PT %>% left_join(PT_deciles, by = "hinctnta") # add income surrogate

# Poland
PL_deciles <- cbind(1:10, c(850, 2000.5, 3650.5, 3300.5, 3950.5, 4650.5, 5450.5, 6450.5, 7900.5, 10600)) %>%
  as.data.frame()
colnames(PL_deciles) <- c("hinctnta", "income")
PL_deciles$income <- as.numeric(PL_deciles$income) # make numeric

PL <- PL %>% left_join(PL_deciles, by = "hinctnta") # add income surrogate

Recode all relevant variables used for imputation model (missingness and variable levels)

# Clean important variables chosen for the imputation model
# Define missing values and recode variables for the model

# Austria
AT$eisced[AT$eisced == 55] <- NA 
AT$eisced <- factor(AT$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
AT$bthcld[AT$bthcld == 1] <- 0
AT$bthcld[AT$bthcld == 2] <- 1
AT$dscrgrp[AT$dscrgrp == 1] <- 0
AT$dscrgrp[AT$dscrgrp == 2] <- 1

AT$bthcld[AT$bthcld != 0 & AT$bthcld != 1] <- NA
AT$maritalb[!(AT$maritalb %in% c(1:6))] <- NA
AT$lrscale[!(AT$lrscale %in% c(0:10))] <- NA
AT$dscrgrp[AT$dscrgrp != 0 & AT$dscrgrp != 1] <- NA
AT$hhmmb[AT$hhmmb %in% c(77, 88)] <- NA
AT$agea[AT$agea == 999] <- NA

AT$bthcld <- as.factor(AT$bthcld)
AT$maritalb <- as.factor(AT$maritalb)
AT$dscrgrp <- as.factor(AT$dscrgrp)

# Hungary
HU$eisced[HU$eisced == 55] <- NA 
HU$eisced <- factor(HU$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
HU$bthcld[HU$bthcld == 1] <- 0
HU$bthcld[HU$bthcld == 2] <- 1
HU$dscrgrp[HU$dscrgrp == 1] <- 0
HU$dscrgrp[HU$dscrgrp == 2] <- 1

HU$bthcld[HU$bthcld != 0 & HU$bthcld != 1] <- NA
HU$maritalb[!(HU$maritalb %in% c(1:6))] <- NA
HU$lrscale[!(HU$lrscale %in% c(0:10))] <- NA
HU$dscrgrp[HU$dscrgrp != 0 & HU$dscrgrp != 1] <- NA
HU$hhmmb[HU$hhmmb %in% c(77, 88)] <- NA
HU$agea[HU$agea == 999] <- NA

HU$bthcld <- as.factor(HU$bthcld)
HU$maritalb <- as.factor(HU$maritalb)
HU$dscrgrp <- as.factor(HU$dscrgrp)


# Ireland
IE$eisced[IE$eisced == 55] <- NA 
IE$eisced <- factor(IE$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
IE$bthcld[IE$bthcld == 1] <- 0
IE$bthcld[IE$bthcld == 2] <- 1
IE$dscrgrp[IE$dscrgrp == 1] <- 0
IE$dscrgrp[IE$dscrgrp == 2] <- 1

IE$bthcld[IE$bthcld != 0 & IE$bthcld != 1] <- NA
IE$maritalb[!(IE$maritalb %in% c(1:6))] <- NA
IE$lrscale[!(IE$lrscale %in% c(0:10))] <- NA
IE$dscrgrp[IE$dscrgrp != 0 & IE$dscrgrp != 1] <- NA
IE$hhmmb[IE$hhmmb %in% c(77, 88)] <- NA
IE$agea[IE$agea == 999] <- NA

IE$bthcld <- as.factor(IE$bthcld)
IE$maritalb <- as.factor(IE$maritalb)
IE$dscrgrp <- as.factor(IE$dscrgrp)


# Portugal
PT$eisced[PT$eisced == 55] <- NA 
PT$eisced <- factor(PT$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
PT$bthcld[PT$bthcld == 1] <- 0
PT$bthcld[PT$bthcld == 2] <- 1
PT$dscrgrp[PT$dscrgrp == 1] <- 0
PT$dscrgrp[PT$dscrgrp == 2] <- 1

PT$bthcld[PT$bthcld != 0 & PT$bthcld != 1] <- NA
PT$maritalb[!(PT$maritalb %in% c(1:6))] <- NA
PT$lrscale[!(PT$lrscale %in% c(0:10))] <- NA
PT$dscrgrp[PT$dscrgrp != 0 & PT$dscrgrp != 1] <- NA
PT$hhmmb[PT$hhmmb %in% c(77, 88)] <- NA
PT$agea[PT$agea == 999] <- NA

PT$bthcld <- as.factor(PT$bthcld)
PT$maritalb <- as.factor(PT$maritalb)
PT$dscrgrp <- as.factor(PT$dscrgrp)


# Poland
PL$eisced[PL$eisced == 55] <- NA 
PL$eisced <- factor(PL$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
PL$bthcld[PL$bthcld == 1] <- 0
PL$bthcld[PL$bthcld == 2] <- 1
PL$dscrgrp[PL$dscrgrp == 1] <- 0
PL$dscrgrp[PL$dscrgrp == 2] <- 1

PL$bthcld[PL$bthcld != 0 & PL$bthcld != 1] <- NA
PL$maritalb[!(PL$maritalb %in% c(1:6))] <- NA
PL$lrscale[!(PL$lrscale %in% c(0:10))] <- NA
PL$dscrgrp[PL$dscrgrp != 0 & PL$dscrgrp != 1] <- NA
PL$hhmmb[PL$hhmmb %in% c(77, 88)] <- NA
PL$agea[PL$agea == 999] <- NA

PL$bthcld <- as.factor(PL$bthcld)
PL$maritalb <- as.factor(PL$maritalb)
PL$dscrgrp <- as.factor(PL$dscrgrp)

Make subset of data with variables to be used. We will use 9 variables in addition to the analysis weight so we can weight the data during imputation

# Create variable vector containing the names of relevant variables
variables <- c("pdwrk", "bthcld", "gndr", "maritalb","lrscale", "dscrgrp", "hhmmb", "agea", "wkhtot", "anweight", "income", "eisced")

# Select subsets of data with relevant variables
AT_sub <- AT %>% select(variables)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(variables)
## 
##   # Now:
##   data %>% select(all_of(variables))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
HU_sub <- HU %>% select(variables)

IE_sub <- IE %>% select(variables)

PT_sub <- PT %>% select(variables)

PL_sub <- PL %>% select(variables)

Create interactions with weight variabels with covariates in imputation model

# Interactions with anweight for Portugal
PT_sub$anweight_pdwrk <- PT_sub$anweight * PT_sub$pdwrk
PT_sub$anweight_bthcld <- PT_sub$anweight * as.numeric(PT_sub$bthcld)
PT_sub$anweight_lrscale <- PT_sub$anweight * PT_sub$lrscale
PT_sub$anweight_dscrgrp <- PT_sub$anweight * as.numeric(PT_sub$dscrgrp)
PT_sub$anweight_hhmmb <-PT_sub$anweight * PT_sub$hhmmb
PT_sub$anweight_agea <- PT_sub$anweight * PT_sub$agea
PT_sub$anweight_wkhtot <- PT_sub$anweight * PT_sub$wkhtot
PT_sub$anweight_income <- PT_sub$anweight *PT_sub$income
PT_sub$anweight_eisced <- PT_sub$anweight * as.numeric(PT_sub$eisced)

# Interactions with anweight for Austria
AT_sub$anweight_pdwrk <- AT_sub$anweight * AT_sub$pdwrk
AT_sub$anweight_bthcld <- AT_sub$anweight * as.numeric(AT_sub$bthcld)
AT_sub$anweight_lrscale <- AT_sub$anweight * AT_sub$lrscale
AT_sub$anweight_dscrgrp <- AT_sub$anweight * as.numeric(AT_sub$dscrgrp)
AT_sub$anweight_hhmmb <- AT_sub$anweight * AT_sub$hhmmb
AT_sub$anweight_agea <- AT_sub$anweight * AT_sub$agea
AT_sub$anweight_wkhtot <- AT_sub$anweight * AT_sub$wkhtot
AT_sub$anweight_income <- AT_sub$anweight * AT_sub$income
AT_sub$anweight_eisced <- AT_sub$anweight * as.numeric(AT_sub$eisced)

# Interactions with anweight for Hungary
HU_sub$anweight_pdwrk <- HU_sub$anweight * HU_sub$pdwrk
HU_sub$anweight_bthcld <- HU_sub$anweight * as.numeric(HU_sub$bthcld)
HU_sub$anweight_lrscale <- HU_sub$anweight * HU_sub$lrscale
HU_sub$anweight_dscrgrp <-HU_sub$anweight * as.numeric(HU_sub$dscrgrp)
HU_sub$anweight_hhmmb <- HU_sub$anweight * HU_sub$hhmmb
HU_sub$anweight_agea <- HU_sub$anweight * HU_sub$agea
HU_sub$anweight_wkhtot <- HU_sub$anweight * HU_sub$wkhtot
HU_sub$anweight_income <- HU_sub$anweight * HU_sub$income
HU_sub$anweight_eisced <- HU_sub$anweight * as.numeric(HU_sub$eisced)

# Interactions with anweight for Ireland
IE_sub$anweight_pdwrk <- IE_sub$anweight * IE_sub$pdwrk
IE_sub$anweight_bthcld <- IE_sub$anweight * as.numeric(IE_sub$bthcld)
IE_sub$anweight_lrscale <- IE_sub$anweight * IE_sub$lrscale
IE_sub$anweight_dscrgrp <- IE_sub$anweight * as.numeric(IE_sub$dscrgrp)
IE_sub$anweight_hhmmb <- IE_sub$anweight * IE_sub$hhmmb
IE_sub$anweight_agea <- IE_sub$anweight * IE_sub$agea
IE_sub$anweight_wkhtot <- IE_sub$anweight * IE_sub$wkhtot
IE_sub$anweight_income <- IE_sub$anweight * IE_sub$income
IE_sub$anweight_eisced <- IE_sub$anweight * as.numeric(IE_sub$eisced)

# Interactions with anweight for Poland
PL_sub$anweight_pdwrk <- PL_sub$anweight * PL_sub$pdwrk
PL_sub$anweight_bthcld <- PL_sub$anweight * as.numeric(PL_sub$bthcld)
PL_sub$anweight_lrscale <- PL_sub$anweight * PL_sub$lrscale
PL_sub$anweight_dscrgrp <- PL_sub$anweight * as.numeric(PL_sub$dscrgrp)
PL_sub$anweight_hhmmb <- PL_sub$anweight * PL_sub$hhmmb
PL_sub$anweight_agea <- PL_sub$anweight * PL_sub$agea
PL_sub$anweight_wkhtot <-  PL_sub$anweight * PL_sub$wkhtot
PL_sub$anweight_income <- PL_sub$anweight * PL_sub$income
PL_sub$anweight_eisced <- PL_sub$anweight * as.numeric(PL_sub$eisced)

Make methods

# Create methods for imputation models (this is the same for every country)
meth <- make.method(AT_sub)

Make predictor matrix

# Predictor matrix for Portugal
PT_pred <- quickpred(PT_sub)
PT_pred[, 'income'] <- 1
PT_pred['income', 'income'] <- 0

# Predictor matrix for Austria
AT_pred <- quickpred(AT_sub)
AT_pred[, 'income'] <- 1
AT_pred['income', 'income'] <- 0

# Predictor matrix for Ireland
IE_pred <- quickpred(IE_sub)
IE_pred[, 'income'] <- 1
IE_pred['income', 'income'] <- 0

# Predictor matrix Hungary 
HU_pred <- quickpred(HU_sub)
HU_pred[, 'income'] <- 1
HU_pred['income', 'income'] <- 0

# Predictor matrix for Poland
PL_pred <- quickpred(PL_sub)
PL_pred[, 'income'] <- 1
PL_pred['income', 'income'] <- 0

Imputation per country

# Imputation for Portugal
vis_miss(PT_sub) # Imputing m sets according to amount of percentage missing in income
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
##   Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.

PT_imp <- mice(PT_sub,
               m = 20,
               maxit = 10, 
               method = meth,
               predictorMatrix = PT_pred,
               seed = 12345,
               print = FALSE)
## Warning: Number of logged events: 2600
# Imputation for Poland
vis_miss(PL_sub) # Imputing m sets according to amount of percentage missing in income

PL_imp <- mice(PL_sub,
               m = 39,
               maxit = 10, 
               method = meth,
               predictorMatrix = PL_pred,
               seed = 12345,
               print = FALSE)

# Imputation for Hungary
vis_miss(HU_sub) # Imputing m sets according to amount of percentage missing in income

HU_imp <- mice(HU_sub,
               m = 40,
               maxit = 10, 
               method = meth,
               predictorMatrix = HU_pred,
               seed = 12345,
               print = FALSE)

# Imputation for Ireland
vis_miss(IE_sub) # Imputing m sets according to amount of percentage missing in income

IE_imp <- mice(IE_sub,
               m = 28,
               maxit = 10, 
               method = meth,
               predictorMatrix = IE_pred,
               seed = 12345,
               print = FALSE)

# Imputation for Austria
vis_miss(AT_sub) # Imputing m sets according to amount of percentage missing in income

AT_imp <- mice(AT_sub,
               m = 18,
               maxit = 10, 
               method = meth,
               predictorMatrix = AT_pred,
               seed = 12345,
               print = FALSE)

Checking convergence

# Checking convergence for Portugal
plot(PT_imp)

densityplot(PT_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
##     ggplot2::geom_density() 
## 
## See amices.org/ggmice for more info.

bwplot(PT_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
##     ggplot2::geom_boxplot() 
## 
## See amices.org/ggmice for more info.

# Outcome is midpoint of the median income class
PT_imp %>% 
  complete('long') %>% 
  with(tapply(income, .imp, median)) %>% 
  median()
## [1] 13885
# Checking convergence for Poland
plot(PL_imp)

densityplot(PL_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
##     ggplot2::geom_density() 
## 
## See amices.org/ggmice for more info.

bwplot(PL_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
##     ggplot2::geom_boxplot() 
## 
## See amices.org/ggmice for more info.

# Outcome is midpoint of the median income class
PL_imp %>% 
  complete('long') %>% 
  with(tapply(income, .imp, median)) %>% 
  median()
## [1] 3950.5
# Checking convergence for Ireland
plot(IE_imp)

densityplot(IE_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
##     ggplot2::geom_density() 
## 
## See amices.org/ggmice for more info.

bwplot(IE_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
##     ggplot2::geom_boxplot() 
## 
## See amices.org/ggmice for more info.

# Outcome is midpoint of the median income class
IE_imp %>% 
  complete('long') %>% 
  with(tapply(income, .imp, median)) %>% 
  median()
## [1] 572.5
# Checking convergence for Hungary
plot(HU_imp)

densityplot(HU_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
##     ggplot2::geom_density() 
## 
## See amices.org/ggmice for more info.

bwplot(HU_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
##     ggplot2::geom_boxplot() 
## 
## See amices.org/ggmice for more info.

# Outcome is midpoint of the median income class
HU_imp %>% 
  complete('long') %>% 
  with(tapply(income, .imp, median)) %>% 
  median()
## [1] 214500
# Checking convergence for Austria
plot(AT_imp)

densityplot(AT_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
##     ggplot2::geom_density() 
## 
## See amices.org/ggmice for more info.

bwplot(AT_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run: 
## 
##     ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
##     ggplot2::geom_boxplot() 
## 
## See amices.org/ggmice for more info.

# Outcome is midpoint of the median income class
AT_imp %>% 
  complete('long') %>% 
  with(tapply(income, .imp, median)) %>% 
  median()
## [1] 34050

Donnelly, M. J., & Pop-Eleches, G. (2018). Income measures in cross-national surveys: problems and solutions. Political Science Research and Methods, 6(2), 355-363.